# Load config
source("src/config.R")
# Read integrated omics file
# rp <- readRDS("/s/project/mitoMultiOmics/multiOMICs_integration/processed_data/integration/patient_omics.RDS") %>% as.data.table()
rp <- readRDS(snakemake@input$patient_omics) %>% as.data.table()
rp <- rp[ gene_detected != "no RNA"]
rp <- rp[ !is.na(normcounts )]
ggplot(rp, aes(log10(normcounts), fill = gene_detected))+
geom_density(alpha = 0.4)+
theme_bw()+
ggtitle("RNA counts for non-detected proteins")

rm(rp)
# Get all protein coding genes
# genecode_v29 <- fread('/s/project/mitoMultiOmics/multiOMICs_integration/datasets/gene_annotation_v29.tsv')
genecode_v29 <- fread(snakemake@input$gencode_annotation)
genecode_v29[, geneID := toupper(gene_name)]
genecode_v29 <- genecode_v29[ , .(geneID , gene_type)]
genecode_v29 <- genecode_v29[!duplicated(genecode_v29)]
genecode_v29 <- genecode_v29[order(gene_type)]
genecode <- aggregate(genecode_v29[, -1], by= list(genecode_v29$geneID), paste)
setnames(genecode, c("geneID" , "gencode_v29") )
genecode$gencode_v29 <- as.character(genecode$gencode_v29)
genecode <- genecode[!duplicated(genecode), ]
rm(genecode_v29)
# Load disease genes table
# dis_genes <- fread('/s/project/mitoMultiOmics/multiOMICs_integration/datasets/disease_genes.tsv')
dis_genes <- fread(snakemake@input$disease_genes)
dis_genes <- dis_genes[ , .(geneID , DISEASE)]
dis_genes <- dis_genes[!duplicated(dis_genes)]
dis_genes <- dis_genes[order(DISEASE)]
dg <- aggregate(dis_genes[, -1], by= list(dis_genes$geneID), paste)
setnames(dg, c("geneID" , "disease") )
dg$disease <- as.character(dg$disease)
dg <- dg[!duplicated(dg), ]
rm(dis_genes)
allgenes <- merge(genecode, dg, by = "geneID", all.x = T )
rm(genecode, dg)
# Load list of genes, detected by RNS-seq
# detected_transcripts <- fread('/s/project/mitoMultiOmics/multiOMICs_integration/processed_data/integration/detected_transcripts.tsv')
detected_transcripts <- fread(snakemake@input$detected_transcripts)
detected_transcripts[once == T , fib_RNA := "once"]
detected_transcripts[half == T , fib_RNA := "half of the samples"]
detected_transcripts[all == T , fib_RNA := "all of the samples"]
detected_transcripts <- detected_transcripts[ , .(geneID, fib_RNA)]
detected_transcripts <- detected_transcripts[!duplicated(detected_transcripts)]
allgenes <- as.data.table( merge(allgenes, detected_transcripts, by = "geneID", all.x = T) )
allgenes[is.na(fib_RNA), fib_RNA := "not detected" ]
# Load list of genes, detected by proteomics
# detected_proteins <- fread('/s/project/mitoMultiOmics/multiOMICs_integration/processed_data/integration/detected_proteins.tsv')
detected_proteins <- fread(snakemake@input$detected_proteins)
detected_proteins[once == T , fib_protein := "once"]
detected_proteins[half == T , fib_protein := "half of the samples"]
detected_proteins[all == T , fib_protein := "all of the samples"]
detected_proteins <- detected_proteins[ , .(geneID, fib_protein)]
detected_proteins <- detected_proteins[!duplicated(detected_proteins)]
allgenes <- as.data.table( merge(allgenes, detected_proteins, by = "geneID", all.x = T) )
allgenes[is.na(fib_protein), fib_protein := "not detected" ]
# Load list of genes, detected by GTEx proteomics
# detected_proteins_gtex <- fread('/s/project/mitoMultiOmics/multiOMICs_integration/processed_data/integration/detected_proteins_gtex.tsv')
detected_proteins_gtex <- fread(snakemake@input$detected_proteins_gtex)
detected_proteins_gtex[once == T , protein := "once"]
detected_proteins_gtex[half == T , protein := "half of the samples"]
detected_proteins_gtex[all == T , protein := "all of the samples"]
detected_proteins_gtex <- detected_proteins_gtex[ , .(geneID, protein, TISSUE)]
detected_proteins_gtex <- detected_proteins_gtex[!is.na(protein)]
detected_proteins_gtex[ , gtex_protein := paste(protein , "in", TISSUE)]
detected_proteins_gtex <- detected_proteins_gtex[ , .(geneID, gtex_protein)]
detected_proteins_gtex <- detected_proteins_gtex[!duplicated(detected_proteins_gtex)]
dp_gtex <- aggregate(detected_proteins_gtex[, -1], by= list(detected_proteins_gtex$geneID), paste)
setnames(dp_gtex, c("geneID" , "gtex_protein") )
dp_gtex$gtex_protein <- as.character(dp_gtex$gtex_protein)
dp_gtex <- dp_gtex[!duplicated(dp_gtex), ]
rm(detected_proteins_gtex)
allgenes <- as.data.table( merge(allgenes, dp_gtex, by = "geneID", all.x = T) )
allgenes[is.na(gtex_protein), gtex_protein := "not detected" ]
allgenes <- allgenes[ !(fib_RNA == "not detected" & fib_protein == "not detected" & gtex_protein == "not detected" )]
allgenes <- allgenes[order(disease)]